0: Preparation
Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
# empirical_species <- "German Shepherd"
empirical_dog_breed <- Sys.getenv("empirical_dog_breed")
empirical_species <- paste(toupper(substring(unlist(strsplit(empirical_dog_breed, "_")), 1, 1)),
substring(unlist(strsplit(empirical_dog_breed, "_")), 2),
sep = "", collapse = " ")
# empirical_species <- "Labrador Retriever"
# document_title <- paste(empirical_species,"Pipeline Results - 50 N_e bottleneck scenario")
# document_title <- paste(empirical_species,"55 Gen Breed Formation - SNP chip: Post_bottleneck_generation - Mutations introduced - 50 N_e bottleneck scenario")
# document_title <- paste(empirical_species,"40 Gen Breed Formation - SNP chip: Post_bottleneck_generation - Mutations introduced - 50 N_e bottleneck scenario")
N_e_bottleneck <- as.numeric(Sys.getenv("N_e_bottleneck"))
n_simulated_generations_breed_formation <- as.numeric(Sys.getenv("n_simulated_generations_breed_formation"))
reference_population_for_snp_chip <- Sys.getenv("reference_population_for_snp_chip")
document_title <- paste0(empirical_species," ",n_simulated_generations_breed_formation," Gen Breed Formation - SNP chip: ",reference_population_for_snp_chip," - ",N_e_bottleneck," N_e bottleneck scenario")
document_sub <- Sys.getenv("document_sub_title")
# #
# # MAF_pruning_used <- TRUE
# MAF_pruning_used <- FALSE
#
# N_e <- 340
# # document_sub <- paste("MAF 0.05 used, but only for H_E-computation, N_e=", N_e)
# # document_sub <- paste("MAF 0.01 used, but only for H_E-computation, N_e=", N_e)
#
#
#
#
#
# if (MAF_pruning_used == FALSE) {
# document_sub <- paste("No MAF-based pruning used, N_e =", N_e)
#
#
# } else {
# document_sub <- paste("MAF-based pruning used, N_e =", N_e)
# }
############################################
# Parameters used for displaying the result
############################################
ROH_frequency_decimals <- 1
H_e_values_decimals <- 3
F_ROH_values_decimals <- 3
Window_lengths_decimals <- 2
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
pruned_replicates_count_dir <- Sys.getenv("pruned_replicates_count_dir")
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
causative_variant_windows_H_e_dir <- Sys.getenv("causative_variant_windows_H_e_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
output_dir <- Sys.getenv("Pipeline_results_output_dir")
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
Loading libraries
if (!require(knitr)) install.packages("knitr", dependencies = TRUE)
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 4.3.2
library(knitr)
if (!require(ggplot2)) install.packages("ggplot2", dependencies = TRUE)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.1
library(ggplot2)
if (!require(scatterplot3d)) install.packages("scatterplot3d", dependencies = TRUE)
## Loading required package: scatterplot3d
library(scatterplot3d) # For generating the 3d plots
if (!require(RColorBrewer)) install.packages("RColorBrewer", dependencies = TRUE)
## Loading required package: RColorBrewer
library(RColorBrewer) # For generating a color palette
Latex formatting function
Causative variant
Variant fixation
Fixation probability
Fixation time
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
Summary - Variant fixation
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/Causative_variant_Fixation_summary.txt"
| s=0.1 |
34.5 |
53 |
37 |
72 |
| s=0.2 |
83.3 |
34 |
21 |
50 |
| s=0.3 |
83.3 |
22 |
11 |
33 |
| s=0.4 |
76.9 |
16 |
11 |
20 |
| s=0.5 |
66.7 |
12 |
9 |
17 |
| s=0.6 |
69.0 |
10 |
7 |
12 |
| s=0.7 |
66.7 |
8 |
6 |
10 |
| s=0.8 |
71.4 |
7 |
5 |
8 |
Causative variant windows
Variant position
Window lengths
Summary - Causative variant windows
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/Causative_variant_windows_summary.txt"
| 2 |
s=0.2 |
1.46 |
82.3 |
35.5 |
100.0 |
84.3 |
| 3 |
s=0.3 |
2.25 |
79.1 |
18.1 |
100.0 |
80.3 |
| 7 |
s=0.7 |
2.45 |
74.1 |
23.1 |
100.0 |
75.2 |
| 1 |
s=0.1 |
2.49 |
59.0 |
17.9 |
99.1 |
61.0 |
| 6 |
s=0.6 |
3.02 |
80.7 |
17.9 |
100.0 |
82.0 |
| 4 |
s=0.4 |
3.04 |
81.7 |
8.5 |
100.0 |
82.3 |
| 5 |
s=0.5 |
3.12 |
67.0 |
17.9 |
100.0 |
68.1 |
| 8 |
s=0.8 |
4.45 |
82.4 |
12.9 |
100.0 |
83.1 |
Standard Error Confidence interval function
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# if ( nrow(observed_data) > 1 ) {
if ( length(observed_data) > 1 ) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
} else {
return(c(NA,NA))
}
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
1: ROH-Frequency
1.1 : Autosome ROH-frequencies
1.1.1 : Empirical - ROH frequency
1.1.2: Neutral Model - ROH frequency
1.1.3: Selection Model
1.1.4 Summary - ROH-hotspot threshold
## ROH-hotspot selection testing results://n
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/ROH-hotspot_threshold_comparison.txt"
| Neutral |
52.0 |
| Empirical |
55.4 |
| s=0.1 |
64.6 |
| s=0.5 |
79.2 |
| s=0.3 |
81.8 |
| s=0.2 |
82.8 |
| s=0.7 |
85.8 |
| s=0.8 |
87.1 |
| s=0.4 |
87.5 |
| s=0.6 |
88.8 |
1.2 ROH-hotspots - ROH Frequency and Length
2: Inbreeding coefficient
2.1 Empirical data

## Overall Average Avg_F_ROH for labrador_retriever : 0.2333818 //n
2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.2254513 //n
## [1] "Bootstrap 95% Confidence Interval: [0.210695324058223, 0.24020718182413]"
2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.2324361 //n[1] "Bootstrap 95% Confidence Interval: [0.219446141689648, 0.245425997133882]"

## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.2602741 //n[1] "Bootstrap 95% Confidence Interval: [0.247307214840647, 0.273240978100529]"

## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.293561 //n[1] "Bootstrap 95% Confidence Interval: [0.267891390833455, 0.319230526813604]"

## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.3024522 //n[1] "Bootstrap 95% Confidence Interval: [0.282208055417251, 0.322696370465102]"

## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.3123284 //n[1] "Bootstrap 95% Confidence Interval: [0.291190019682684, 0.333466850905551]"

## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.3255663 //n[1] "Bootstrap 95% Confidence Interval: [0.305588675205435, 0.345543948323977]"

## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.3400056 //n[1] "Bootstrap 95% Confidence Interval: [0.309288678924456, 0.370722596369662]"

## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.366789 //n[1] "Bootstrap 95% Confidence Interval: [0.332464701241699, 0.401113258758301]"
2.4 F_ROH summary
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/F_ROH_comparison.txt"
| Neutral |
0.225 |
0.211 |
0.240 |
| s=0.1 |
0.232 |
0.219 |
0.245 |
| Empirical |
0.233 |
NA |
NA |
| s=0.2 |
0.260 |
0.247 |
0.273 |
| s=0.3 |
0.294 |
0.268 |
0.319 |
| s=0.4 |
0.302 |
0.282 |
0.323 |
| s=0.5 |
0.312 |
0.291 |
0.333 |
| s=0.6 |
0.326 |
0.306 |
0.346 |
| s=0.7 |
0.340 |
0.309 |
0.371 |
| s=0.8 |
0.367 |
0.332 |
0.401 |
3: Expected Heterozygosity
3.1 Empirical data
3.2 Neutral Model
3.3 Selection Model
3.4 Causative Variant Window
## Point estimate H_e across all 20 simulations for s01_chr3 : 0.2812145 //n[1] "Bootstrap 95% Confidence Interval: [0.222646677729938, 0.339782258855786]"
## Point estimate H_e across all 20 simulations for s02_chr3 : 0.1891011 //n[1] "Bootstrap 95% Confidence Interval: [0.123639281073536, 0.254562880359914]"
## Point estimate H_e across all 20 simulations for s03_chr3 : 0.2365339 //n[1] "Bootstrap 95% Confidence Interval: [0.162386232827551, 0.310681584306142]"
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.2537652 //n[1] "Bootstrap 95% Confidence Interval: [0.166018863652156, 0.341511538138468]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.3532439 //n[1] "Bootstrap 95% Confidence Interval: [0.274108222328155, 0.432379632844626]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.2796745 //n[1] "Bootstrap 95% Confidence Interval: [0.188098900553001, 0.371250174395178]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.3020188 //n[1] "Bootstrap 95% Confidence Interval: [0.223970872874214, 0.380066667347565]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.2478038 //n[1] "Bootstrap 95% Confidence Interval: [0.184246267574859, 0.311361304621735]"
3.4 Genomewide Average H_e
| s=0.4 |
0.349 |
0.343 |
0.356 |
| s=0.3 |
0.350 |
0.344 |
0.355 |
| s=0.7 |
0.351 |
0.343 |
0.360 |
| s=0.6 |
0.352 |
0.343 |
0.360 |
| s=0.8 |
0.352 |
0.343 |
0.360 |
| s=0.2 |
0.354 |
0.351 |
0.357 |
| s=0.5 |
0.354 |
0.349 |
0.360 |
| Empirical |
0.355 |
NA |
NA |
| Neutral |
0.357 |
0.353 |
0.360 |
| s=0.1 |
0.358 |
0.354 |
0.362 |
3.5 Genomewide 5th percentile comparison - Expected Heterozygosity Summary
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/Expected_Heterozygosity_Summary.txt"
| s=0.8 |
0.175 |
0.160 |
0.190 |
| s=0.7 |
0.180 |
0.166 |
0.194 |
| s=0.5 |
0.184 |
0.175 |
0.193 |
| s=0.6 |
0.186 |
0.173 |
0.200 |
| s=0.4 |
0.187 |
0.176 |
0.199 |
| s=0.3 |
0.191 |
0.179 |
0.202 |
| s=0.2 |
0.202 |
0.196 |
0.209 |
| Empirical |
0.209 |
NA |
NA |
| s=0.1 |
0.213 |
0.206 |
0.220 |
| Neutral |
0.213 |
0.206 |
0.220 |
4: Summary
4.0 General comparison
4.0.1 ROH-hotspot threshold comparison
##
## ROH-hotspot threshold comparison between the different datasets
| Neutral |
52.0 |
| Empirical |
55.4 |
| s=0.1 |
64.6 |
| s=0.5 |
79.2 |
| s=0.3 |
81.8 |
| s=0.2 |
82.8 |
| s=0.7 |
85.8 |
| s=0.8 |
87.1 |
| s=0.4 |
87.5 |
| s=0.6 |
88.8 |
4.0.2 F_ROH comparison
## Models where empirical F_ROH is within CI:
## [1] "s=0.1" "Neutral"
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## png
## 2
| Neutral |
0.225 |
0.211 |
0.240 |
| s=0.1 |
0.232 |
0.219 |
0.245 |
| Empirical |
0.233 |
NA |
NA |
| s=0.2 |
0.260 |
0.247 |
0.273 |
| s=0.3 |
0.294 |
0.268 |
0.319 |
| s=0.4 |
0.302 |
0.282 |
0.323 |
| s=0.5 |
0.312 |
0.291 |
0.333 |
| s=0.6 |
0.326 |
0.306 |
0.346 |
| s=0.7 |
0.340 |
0.309 |
0.371 |
| s=0.8 |
0.367 |
0.332 |
0.401 |
4.1: Hotspot comparison
4.1.1: Selection test (Sweep test)
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.205921"
| Hotspot_chr1_window_1 |
Yes |
0.124 |
| Hotspot_chr28_window_1 |
Yes |
0.197 |
| Hotspot_chr24_window_1 |
No |
0.217 |
| Hotspot_chr13_window_1 |
No |
0.225 |
| Hotspot_chr6_window_1 |
No |
0.263 |
| Hotspot_chr15_window_1 |
No |
0.268 |
| Hotspot_chr11_window_2 |
No |
0.284 |
| Hotspot_chr11_window_1 |
No |
0.289 |
| Hotspot_chr8_window_1 |
No |
0.369 |
## [1] "/home/jonathan/results/Pipeline_results_MAF_0_05/ROH_hotspots_Selection_testing_neutral_model_H_E_threshold_0.206.csv.txt"
## [1] "ROH-hotspots under selection:"
| Hotspot_chr1_window_1 |
Yes |
0.124 |
| Hotspot_chr28_window_1 |
Yes |
0.197 |
4.1.2: Selection Strength Test (Sweep test)
Hotspot - Causative Window - Comparison
| s=0.8 |
4.45 |
82.4 |
0.248 |
No |
| s=0.7 |
2.45 |
74.1 |
0.302 |
No |
| s=0.6 |
3.02 |
80.7 |
0.280 |
No |
| s=0.5 |
3.12 |
67.0 |
0.353 |
No |
| s=0.4 |
3.04 |
81.7 |
0.254 |
No |
| s=0.3 |
2.25 |
79.1 |
0.237 |
No |
| s=0.2 |
1.46 |
82.3 |
0.189 |
Yes |
| s=0.1 |
2.49 |
59.0 |
0.281 |
No |
| Hotspot_chr28_window_1 |
1.40 |
75.0 |
0.197 |
Yes |
| Hotspot_chr1_window_1 |
0.20 |
61.9 |
0.124 |
Yes |
# plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
plot_title <- paste("ROH Hotspot(s) & Causative Windows comparison")
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Removing the "s=" part from the selection coefficients for the plot displayal
Hotspots_and_Causative_windows_comparison_sorted$Label <- gsub("^s=", "", Hotspots_and_Causative_windows_comparison_sorted$Label)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
# # Create the 3D scatter plot
# s3d <- scatterplot3d(
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
# pch = 19, # Solid circle
# xlab = "Lengths",
# ylab = "ROH Frequency",
# zlab = "H_e",
# main = plot_title
# )
#
# # Add labels to the points
# s3d.coords <- s3d$xyz.convert(
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$H_e
# )
# text(s3d.coords$x, s3d.coords$y,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
# pos = 3, cex = 0.5) # pos=3 means above
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
Hotspots_and_Causative_windows_comparison_sorted$H_e,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Avg ROH-frequency (%)",
ylab = "Length (Mb)",
zlab = "Avg H_e",
main = plot_title
)
# )
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
Hotspots_and_Causative_windows_comparison_sorted$H_e
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above
# Test med skuggor
setwd(output_dir)
# plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
plot_title <- paste("ROH Hotspot(s) & Causative Windows comparison")
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Removing the "s=" part from the selection coefficients for the plot display
Hotspots_and_Causative_windows_comparison_sorted$Label <- gsub("^s=", "", Hotspots_and_Causative_windows_comparison_sorted$Label)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
# Generate a color palette for the hotspots
hotspot_models <- unique(Hotspots_and_Causative_windows_comparison_sorted$Model[Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot"])
num_hotspots <- length(hotspot_models)
# Choose the color palette based on the number of hotspots
if (num_hotspots == 2) {
color_palette_name <- "Set2"
} else {
color_palette_name <- "Set3"
}
# Get the colors for the hotspots
hotspot_colors <- setNames(brewer.pal(n = num_hotspots, name = color_palette_name), hotspot_models)
## Warning in brewer.pal(n = num_hotspots, name = color_palette_name): minimal value for n is 3, returning requested palette with 3 different levels
# Assign colors to each point
Hotspots_and_Causative_windows_comparison_sorted$Color <- ifelse(
Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot",
hotspot_colors[Hotspots_and_Causative_windows_comparison_sorted$Model],
"blue"
)
x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
y_axis_label <- "Length (Mb)"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
z_axis_label <- "Avg H_e"
# x_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# x_axis_label <- "Length (Mb)"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# y_axis_label <- "Avg H_e"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# z_axis_label <- "Avg ROH-frequency (%)"
x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
y_axis_label <- "Avg H_e"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
z_axis_label <- "Length (Mb)"
# x_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# x_axis_label <- "Avg H_e"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# y_axis_label <- "Avg ROH-frequency (%)"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# z_axis_label <- "Length (Mb)"
# # Create and save the 3D scatter plot as a PNG file
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png", width = 1920, height = 1080, res = 300)
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png",width = 800, height = 600, res = 300)
# Create the 3D scatter plot
s3d <- scatterplot3d(
x_value ,
y_value,
z_value,
color = Hotspots_and_Causative_windows_comparison_sorted$Color,
pch = 19, # Solid circle
xlab = x_axis_label,
ylab = y_axis_label,
zlab = z_axis_label,
main = plot_title
)
# Add shadows on the x-y plane (z = 0)
s3d$points3d(
x_value,
y_value,
rep(0, nrow(Hotspots_and_Causative_windows_comparison_sorted)), # Set z to 0 for shadow
col = adjustcolor(Hotspots_and_Causative_windows_comparison_sorted$Color, alpha.f = 0.3), # Semi-transparent shadows
pch = 19
)
# Convert coordinates for adding labels
s3d.coords <- s3d$xyz.convert(
x_value,
y_value,
z_value
)
# Add labels to the original points
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above

# # Close the graphics device
# dev.off()
Fixar överlappande labels
# # Increase plot size
# par(mar = c(5, 5, 5, 5)) # Adjust margins if needed
#
# s3d <- scatterplot3d(
# # x = Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# # y = Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# # z = Hotspots_and_Causative_windows_comparison_sorted$H_e,
#
# # x = Hotspots_and_Causative_windows_comparison_sorted$H_e,
# # y = Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# # z = Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq ,
#
# # x = Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb ,
# # y = Hotspots_and_Causative_windows_comparison_sorted$H_e,
# # z = Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq ,
#
# x = Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# y = Hotspots_and_Causative_windows_comparison_sorted$H_e,
# z = Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb ,
#
#
# color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
# pch = 19,
# # xlab = "Avg ROH-frequency (%)",
# # ylab = "Length (Mb)",
# # zlab = "H_e",
#
# # xlab = "Length (Mb) ",
# # ylab = "H_e",
# # zlab = "Avg ROH-frequency (%)",
#
# # xlab = "Length (Mb) ",
# # ylab = "H_e",
# # zlab = "Avg ROH-frequency (%)",
#
# xlab = "Avg ROH-frequency (%)",
# ylab = "H_e",
# zlab = "Length (Mb)",
#
#
#
# main = plot_title
# )
# # # Add labels to the points
# # s3d.coords <- s3d$xyz.convert(
# # Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# # Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# # Hotspots_and_Causative_windows_comparison_sorted$H_e
# # )
#
#
# # Add labels to the points
# s3d.coords <- s3d$xyz.convert(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# )
#
# #
# # # Add jitter to the label positions
# # text(s3d.coords$x + runif(nrow(Hotspots_and_Causative_windows_comparison_sorted), -0.04, 0.04),
# # s3d.coords$y + runif(nrow(Hotspots_and_Causative_windows_comparison_sorted), -0.04, 0.04),
# # labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
# # pos = 3, cex = 0.5)
# #
# #
# text(s3d.coords$x, s3d.coords$y,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
# pos = 3, cex = 0.5) # pos=3 means above
jittering on all axis
# setwd(output_dir)
#
# # Identify indices for selection and non-selection points based on labels starting with '0'
# selection_indices <- grepl("^0", Hotspots_and_Causative_windows_comparison_sorted$Label)
# non_selection_indices <- !selection_indices
#
# # windows(width = 1920 / 96, height = 1080 / 96) # 96 DPI is default
# # # # Create and save the 3D scatter plot as a PNG file
# # png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png", width = 1920, height = 1080)
#
# # Create the 3D scatter plot with both selection and non-selection points
# s3d <- scatterplot3d(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# color = ifelse(selection_indices, "blue", "red"), # Blue for selection, Red for hotspots
# pch = 19, # Solid circle
# xlab = "Avg ROH-frequency (%)",
# ylab = "Length (Mb)",
# zlab = "H_e",
# main = plot_title
# )
# # Convert 3D coordinates for labeling
# s3d.coords <- s3d$xyz.convert(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e
# )
# # Subset data for non-selection points
# non_selection_data <- Hotspots_and_Causative_windows_comparison_sorted[non_selection_indices, ]
# # s3d.coords$x[non_selection_indices]
# # 7.888889 6.278889 6.715556 7.655556 6.674444 5.723333 6.143333 5.773333 6.212222
# # custom_jitter_x_axis_hotspots <- c(0,0,-0.05,0,0.1,-0.4,0,0,0) # Adjust as needed
# custom_jitter_x_axis_hotspots <- c(0,0,0,0,0,0,0,0,0) # Adjust as needed
#
# # s3d.coords$y[non_selection_indices]
# #3.7711111 2.4511111 1.9444444 1.5644444 2.5355556 3.1066667 2.6066667 3.1466667 0.8977778
# # custom_jitter_y_axis_hotspots <- c(0,-0.7,-0.65,0,0,-0.3,0,0.1,0) # Adjust as needed
# custom_jitter_y_axis_hotspots <- c(0,0,0,0,0,0,0,0,0) # Adjust as needed
#
# # Add labels for non-selection points without jitter
# text(s3d.coords$x[non_selection_indices]+ custom_jitter_x_axis_hotspots,
# s3d.coords$y[non_selection_indices]+ custom_jitter_y_axis_hotspots,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label[non_selection_indices],
# pos = 3, cex = 0.7)
#
#
#
# selection_data <- Hotspots_and_Causative_windows_comparison_sorted[selection_indices, ]
# custom_jitter_x_axis_sel_coeff <- c(0.2,0.05,0.025,0,-0.05,0,0)
# # custom_jitter_x_axis_sel_coeff <- c(0,0,0,0,0,0,0)
# custom_jitter_y_axis_sel_coeff <- c(-0.25,0,-0.57,-0.1,-0.6,0,0)
# # custom_jitter_y_axis_sel_coeff <- c(0,0,0,0,0,0,0)
# # Apply jitter to selection point labels
# label_jitter <- runif(sum(selection_indices), 0, 0.2) # Small random jitter
# # Add labels for selection points with jitter
# text(s3d.coords$x[selection_indices] + custom_jitter_x_axis_sel_coeff,
# s3d.coords$y[selection_indices] + custom_jitter_y_axis_sel_coeff,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label[selection_indices],
# pos = 3, cex = 0.7)
# # Close the graphics device
# dev.off()
# kable(selection_data,row.names = FALSE)
# kable(non_selection_data,row.names = FALSE)
#Jitter z-axis
# # Create the 3D scatter plot with both selection and non-selection points
# s3d <- scatterplot3d(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# color = ifelse(selection_indices, "blue", "red"), # Blue for selection, Red for hotspots
# pch = 19, # Solid circle
# xlab = "Avg ROH-frequency (%)",
# ylab = "Length (Mb)",
# zlab = "H_e",
# main = plot_title
# )
#
# # Convert 3D coordinates for labeling
# s3d.coords <- s3d$xyz.convert(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e
# )
#
# # Generate small random jitter for the y-axis (simulating z-axis movement)
# y_jitter <- runif(sum(selection_indices), -0.02, 0.02)
#
# # Adjust y-coordinates for selection labels only
# jittered_y <- s3d.coords$y[selection_indices] + y_jitter
#
# # Add labels for selection points with y-axis adjustment
# text(s3d.coords$x[selection_indices],
# jittered_y,
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label[selection_indices],
# pos = 3, cex = 0.7)
# #
# # Add labels for non-selection points without jitter
# text(s3d.coords$x[non_selection_indices],
# s3d.coords$y[non_selection_indices],
# labels = Hotspots_and_Causative_windows_comparison_sorted$Label[non_selection_indices],
# pos = 3, cex = 0.7)
# Sort the data frame based on average fixation time, in descending order
kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$ROH_Freq), ])
| 10 |
s=0.8 |
4.45 |
82.4 |
0.248 |
No |
| 4 |
s=0.2 |
1.46 |
82.3 |
0.189 |
Yes |
| 6 |
s=0.4 |
3.04 |
81.7 |
0.254 |
No |
| 8 |
s=0.6 |
3.02 |
80.7 |
0.280 |
No |
| 5 |
s=0.3 |
2.25 |
79.1 |
0.237 |
No |
| 2 |
Hotspot_chr28_window_1 |
1.40 |
75.0 |
0.197 |
Yes |
| 9 |
s=0.7 |
2.45 |
74.1 |
0.302 |
No |
| 7 |
s=0.5 |
3.12 |
67.0 |
0.353 |
No |
| 1 |
Hotspot_chr1_window_1 |
0.20 |
61.9 |
0.124 |
Yes |
| 3 |
s=0.1 |
2.49 |
59.0 |
0.281 |
No |
kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$H_e), ])
| 7 |
s=0.5 |
3.12 |
67.0 |
0.353 |
No |
| 9 |
s=0.7 |
2.45 |
74.1 |
0.302 |
No |
| 3 |
s=0.1 |
2.49 |
59.0 |
0.281 |
No |
| 8 |
s=0.6 |
3.02 |
80.7 |
0.280 |
No |
| 6 |
s=0.4 |
3.04 |
81.7 |
0.254 |
No |
| 10 |
s=0.8 |
4.45 |
82.4 |
0.248 |
No |
| 5 |
s=0.3 |
2.25 |
79.1 |
0.237 |
No |
| 2 |
Hotspot_chr28_window_1 |
1.40 |
75.0 |
0.197 |
Yes |
| 4 |
s=0.2 |
1.46 |
82.3 |
0.189 |
Yes |
| 1 |
Hotspot_chr1_window_1 |
0.20 |
61.9 |
0.124 |
Yes |
# # Load the required packages
# library(rgl)
#
# # Identify indices for selection and non-selection points based on labels starting with '0'
# selection_indices <- grepl("^0", Hotspots_and_Causative_windows_comparison_sorted$Label)
# non_selection_indices <- !selection_indices
#
# # Open a new 3D plotting device
# open3d()
#
# # Plot with rgl
# plot3d(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e,
# col = ifelse(selection_indices, "blue", "red"), # Blue for selection, Red for hotspots
# type = 's', # Use points
# xlab = "Avg ROH-frequency (%)",
# ylab = "Length (Mb)",
# zlab = "H_e",
# main = plot_title
# )
#
# # Convert 3D coordinates for labeling
# s3d_coords <- cbind(
# Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq,
# Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb,
# Hotspots_and_Causative_windows_comparison_sorted$H_e
# )
#
# # Add labels for non-selection points with custom jitter
# text3d(
# s3d_coords[non_selection_indices, ] + cbind(custom_jitter_x_axis_hotspots, custom_jitter_y_axis_hotspots, 0),
# texts = Hotspots_and_Causative_windows_comparison_sorted$Label[non_selection_indices],
# col = "red", # Color of the text
# cex = 0.7
# )
#
# # Add labels for selection points with custom jitter
# text3d(
# s3d_coords[selection_indices, ] + cbind(custom_jitter_x_axis_sel_coeff, custom_jitter_y_axis_sel_coeff, label_jitter),
# texts = Hotspots_and_Causative_windows_comparison_sorted$Label[selection_indices],
# col = "blue", # Color of the text
# cex = 0.7
# )
# # Save the 3D plot as a PNG file
# rgl.postscript("3dplot_Hotspot_Causative_Window_Comparison.png", fmt = "png")
# # Close the 3D device
# rgl.close()
Visualizing and scaling
# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
plot_title <- paste("Normalized ROH Hotspot(s) & Causative Windows comparison")
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Normalized Lengths",
ylab = "Normalized Mean ROH Frequency",
zlab = "Normalized H_e",
main = plot_title
)
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above


5 Visualizing Expected Heterozygoisty
5.1 Bootstrap CI for mean 5th percentile H_e
## TRUE

## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## [1] "s=0.2"
##
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## character(0)
## TRUE

## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## [1] "s=0.2"
##
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## character(0)
## TRUE

## Models where empirical H_e is within CI for Hotspot_chr24_window_1 :
## [1] "s=0.2"
##
## Models where empirical H_e is outside CI for Hotspot_chr24_window_1 :
## character(0)
## TRUE

## Models where empirical H_e is within CI for Hotspot_chr13_window_1 :
## [1] "s=0.2"
##
## Models where empirical H_e is outside CI for Hotspot_chr13_window_1 :
## character(0)
## FALSE

## Models where empirical H_e is within CI for Hotspot_chr6_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr6_window_1 :
## [1] "s=0.2"
## FALSE

## Models where empirical H_e is within CI for Hotspot_chr15_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr15_window_1 :
## [1] "s=0.2"
## FALSE

## Models where empirical H_e is within CI for Hotspot_chr11_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr11_window_2 :
## [1] "s=0.2"
## FALSE

## Models where empirical H_e is within CI for Hotspot_chr11_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr11_window_1 :
## [1] "s=0.2"
## FALSE

## Models where empirical H_e is within CI for Hotspot_chr8_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr8_window_1 :
## [1] "s=0.2"
5.2 5th percentile of the extreme H_e values